# Import required libraries
library(miceadds)
library(miceafter)
library(matlib)
library(lmtest)
library("sandwich")
library(lattice)
library(ggplot2)
library(modelsummary)
options(repr.plot.width=10, repr.plot.height=8)
Lade nötiges Paket: mice
Attache Paket: ‘mice’
Das folgende Objekt ist maskiert ‘package:stats’:
filter
Die folgenden Objekte sind maskiert von ‘package:base’:
cbind, rbind
* miceadds 3.13-12 (2022-05-30 15:14:07)
Lade nötiges Paket: zoo
Attache Paket: ‘zoo’
Die folgenden Objekte sind maskiert von ‘package:base’:
as.Date, as.Date.numeric
# Set work directory
setwd("/Users/milomeyberg/Documents/Immonet_Oktober2/immonet")
# Load mids object
load("mice_imp_ohne_flux_final/mice_imp_ohne_flux_final.Rdata")
# Generate list of completed data.frames
dfs<- complete(get("mi.res"), action='long', include=TRUE)
# Add squared and cubic term of the living area
dfs$WOHNFLAECHE_SQUARED <- dfs$WOHNFLAECHE^2
dfs$WOHNFLAECHE_CUBIC <- dfs$WOHNFLAECHE^3
# Utility Costs
dfs$NEBENKOSTEN_SQUARED <- dfs$NEBENKOSTEN_M2^2
# Scale the year of construction back
dfs$BAUJAHR_NOT_ROUNDED <- ((dfs$BAUJAHR*1000)-2000)
dfs$BAUJAHR_SQUARED_NOT_ROUNDED <- ((dfs$BAUJAHR^2)-(2000^2))
dfs$BAUJAHR <- (round(dfs$BAUJAHR*1000)-2000)
dfs$BAUJAHR_SQUARED <- ((dfs$BAUJAHR^2))
#Turn back to mids
dfs <- as.mids(dfs)
# Formatting outputs for model summary table
glance_custom.mipo <- function(x, ...) {
ret <- as.data.frame(t(apply((x$glanced[c(1:2,5,8:9)]),MARGIN=2,FUN=mean)))
ret
}
# Generate models for each imputed dataset
ra0 <- with(dfs, expr = lm(KALTMIETE_M2 ~ NEBENKOSTEN_M2 + NEBENKOSTEN_SQUARED + HEIZUNGSKOSTEN_M2 + ZIMMER +
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + BAUJAHR + BAUJAHR_SQUARED + Zentralheizung
+ Gas + Fernwärme))
ra1 <- with(dfs, expr = lm(KALTMIETE_M2 ~ NEBENKOSTEN_M2 + NEBENKOSTEN_SQUARED + HEIZUNGSKOSTEN_M2
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + BAUJAHR + BAUJAHR_SQUARED + Zentralheizung
+ Gas))
ra <- with(dfs, expr = lm(KALTMIETE_M2 ~ NEBENKOSTEN_M2 + NEBENKOSTEN_SQUARED
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + BAUJAHR + BAUJAHR_SQUARED + Zentralheizung
+ Gas))
ra2 <- with(dfs, expr = lm(KALTMIETE ~ NEBENKOSTEN + NEBENKOSTEN_SQUARED
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + Neubau + Zentralheizung
+ Gas))
# Build a list of models
models <- list( pool(ra0),pool(ra1),pool(ra))
# Generates model summary table
modelsummary(models, fmt = "%.5f",estimate = "{estimate}{stars}", coef_rename = c("NEBENKOSTEN_M2" = "Utility Costs",
"NEBENKOSTEN_SQUARED"="Utility Costs Squared",
"HEIZUNGSKOSTEN_M2" = "Heating Costs",
"ZIMMER" = "Rooms Number","WOHNFLAECHE" = "Living Area",
"WOHNFLAECHE_SQUARED" = "Living Area Squared",
"WOHNFLAECHE_CUBIC" = "Cubic Living Area",
"BAUJAHR" = "Construction Year",
"BAUJAHR_SQUARED" = "Construcion Year Squared",
"Zentralheizung1" = "Central Heating",
"Gas1" = "Gas", "Fernwärme1"= "District Heating"),output = 'jupyter')
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| (Intercept) | 19.40555*** | 19.08611*** | 19.12490*** |
| (1.42538) | (1.36730) | (1.17332) | |
| Utility Costs | −0.73736+ | −0.71533+ | −0.71293+ |
| (0.40889) | (0.40609) | (0.41462) | |
| Utility Costs Squared | 0.29515*** | 0.29225*** | 0.29236*** |
| (0.06294) | (0.06246) | (0.06179) | |
| Heating Costs | 0.01158 | 0.02602 | |
| (0.24903) | (0.24929) | ||
| Rooms Number | −0.02126 | ||
| (0.17710) | |||
| Living Area | −0.20274*** | −0.20408*** | −0.20430*** |
| (0.03713) | (0.03534) | (0.03456) | |
| Living Area Squared | 0.00225*** | 0.00226*** | 0.00226*** |
| (0.00039) | (0.00038) | (0.00038) | |
| Cubic Living Area | −0.00001*** | −0.00001*** | −0.00001*** |
| (0.00000) | (0.00000) | (0.00000) | |
| Construction Year | 0.13549*** | 0.13456*** | 0.13456*** |
| (0.00580) | (0.00586) | (0.00588) | |
| Construcion Year Squared | 0.00146*** | 0.00145*** | 0.00145*** |
| (0.00006) | (0.00006) | (0.00006) | |
| Central Heating | −1.35902* | −1.35287* | −1.34914* |
| (0.41294) | (0.41826) | (0.41524) | |
| Gas | 0.25570 | 0.48428* | 0.48573* |
| (0.32373) | (0.22319) | (0.21819) | |
| District Heating | −0.32036 | ||
| (0.29879) | |||
| Num.Obs. | 2950 | 2950 | 2950 |
| Num.Imp. | 5 | 5 | 5 |
| R2 | 0.320 | 0.320 | 0.319 |
| R2 Adj. | 0.317 | 0.317 | 0.317 |
| AIC | 17503.3 | 17501.3 | 17501.2 |
| BIC | 17587.2 | 17573.2 | 17567.0 |
# Print the p-value of the overall models
print(paste("Model 1: ",glance_custom.mipo(models[[1]])$p.value))
print(paste("Model 2: ",glance_custom.mipo(models[[2]])$p.value))
print(paste("Model 3: ",glance_custom.mipo(models[[3]])$p.value))
[1] "Model 1: 2.20333948809292e-229" [1] "Model 2: 1.74622816600423e-231" [1] "Model 3: 2.17637295292107e-232"
# Pooling of the selected model
poolm <- pool(ra)
# -------- Deriving the full variance covariance matrix ------------------
# -------(Taken from a Thread of https://stackoverflow.com)---------------
# m different imputations
m <- poolm$m
# Vw, the within imputation covariance matrix
vw <- Reduce("+", lapply(ra$analyses, vcov)) / (m)
# Vb, the between imputation covariance matrix
# mice only provides the diagonal elements
bdiag <- poolm$pooled$b
# Calculate the full Vb matrix by hand
# Qbar, pooled parameter estimates
qbar <- getqbar(poolm)
# Qhats, each imputations parameter estimates
qhats <- sapply(ra$analyses, coef)
vb <- (1 / (m-1)) * (qhats - qbar) %*% t(qhats - qbar)
vt <- vw + (1 + 1 / (m)) * vb # this is t as it used to be
# Checking against the diagonal of t that is still provided
all.equal(as.numeric(diag(vt)), poolm$pooled$t)
# Convert covariance matrix to correlation matrix
corr <- solve(diag(sqrt(diag(vt)))) %*% vt %*% solve(diag(sqrt(diag(vt))))
corr_2 <- cov2cor(vt)
# Corelation matrix of betha
corr_2
| (Intercept) | NEBENKOSTEN_M2 | NEBENKOSTEN_SQUARED | WOHNFLAECHE | WOHNFLAECHE_SQUARED | WOHNFLAECHE_CUBIC | BAUJAHR | BAUJAHR_SQUARED | Zentralheizung1 | Gas1 | |
|---|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 1.00000000 | -0.54908070 | 0.53507033 | -0.83604381 | 0.768735331 | -0.710737552 | 0.16217464 | 0.085159263 | 0.0866590 | -0.025662787 |
| NEBENKOSTEN_M2 | -0.54908070 | 1.00000000 | -0.96781602 | 0.12574763 | -0.096174855 | 0.088883890 | -0.33365539 | -0.190617526 | -0.4081307 | -0.187689625 |
| NEBENKOSTEN_SQUARED | 0.53507033 | -0.96781602 | 1.00000000 | -0.12995706 | 0.100751509 | -0.096018623 | 0.26936047 | 0.164507065 | 0.3063020 | 0.167908072 |
| WOHNFLAECHE | -0.83604381 | 0.12574763 | -0.12995706 | 1.00000000 | -0.978940358 | 0.933850390 | -0.09023771 | -0.045955357 | -0.1269911 | -0.024752816 |
| WOHNFLAECHE_SQUARED | 0.76873533 | -0.09617486 | 0.10075151 | -0.97894036 | 1.000000000 | -0.984328676 | 0.04858789 | 0.008966544 | 0.1242373 | 0.007872795 |
| WOHNFLAECHE_CUBIC | -0.71073755 | 0.08888389 | -0.09601862 | 0.93385039 | -0.984328676 | 1.000000000 | -0.03057400 | -0.001389355 | -0.1135257 | -0.002507977 |
| BAUJAHR | 0.16217464 | -0.33365539 | 0.26936047 | -0.09023771 | 0.048587892 | -0.030574003 | 1.00000000 | 0.872085220 | 0.5342747 | 0.287987085 |
| BAUJAHR_SQUARED | 0.08515926 | -0.19061753 | 0.16450706 | -0.04595536 | 0.008966544 | -0.001389355 | 0.87208522 | 1.000000000 | 0.2954733 | 0.094674245 |
| Zentralheizung1 | 0.08665900 | -0.40813070 | 0.30630202 | -0.12699108 | 0.124237252 | -0.113525681 | 0.53427467 | 0.295473310 | 1.0000000 | 0.437672310 |
| Gas1 | -0.02566279 | -0.18768962 | 0.16790807 | -0.02475282 | 0.007872795 | -0.002507977 | 0.28798708 | 0.094674245 | 0.4376723 | 1.000000000 |
# Generate Residuals and Effect Plots
dfs_1<- lapply(1:5, function(i) complete(dfs, action = i))
for (i in 1:5){
model<- lm(data=dfs_1[[i]],KALTMIETE_M2 ~ NEBENKOSTEN_M2
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + BAUJAHR + BAUJAHR_SQUARED + Zentralheizung
+ Gas)
res <- resid(model)
dfs_1[[i]]$fit <- fitted(model)
#dfs_1[[i]]$res <- rstandard(model)
dfs_1[[i]]$Effect <- model$coefficients["(Intercept)"] + model$coefficients["WOHNFLAECHE"]*dfs_1[[i]]$WOHNFLAECHE + model$coefficients["WOHNFLAECHE_SQUARED"]* (dfs_1[[i]]$WOHNFLAECHE_SQUARED) + model$coefficients["WOHNFLAECHE_CUBIC"]*(dfs_1[[i]]$WOHNFLAECHE_CUBIC)
dfs_1[[i]]$Effect_2 <- model$coefficients["(Intercept)"] + model$coefficients["BAUJAHR"]*dfs_1[[i]]$BAUJAHR + model$coefficients["BAUJAHR_SQUARED"]* (dfs_1[[i]]$BAUJAHR_SQUARED)
#produce residual vs. fitted plot
print(ggplot(data.frame(fitted(model),res),aes(fitted(model),res)) +
geom_point(alpha=0.6) + stat_smooth(method='loess')+ theme_bw()+
xlab('Predicted Values') + ylab('Residuals') +
theme(axis.title = element_text(face="bold")))
#plot effect of the living area
print(qplot(WOHNFLAECHE, Effect,data = dfs_1[[i]],
geom = c("point")) + theme_bw()
+ ggtitle(substitute(paste(bold('Effect of the Living Area')))) + theme(plot.title = element_text(hjust = 0.5))
+ labs(x= expression(bold("Living Area in") ~bold(m^2)), y=substitute(paste(bold('Effect')))
))
#plot effect of the year of construction
print(qplot(BAUJAHR+2000, Effect_2,data = dfs_1[[i]],
geom = c("point")) + theme_bw()
+ ggtitle(substitute(paste(bold('Effect of the Year of Construction')))) + theme(plot.title = element_text(hjust = 0.5))
+ labs(x= substitute(bold(paste('Year of Construction'))), y=substitute(bold(paste('Effect')))))
print(bptest(model))
}
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 263.72, df = 8, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 247.03, df = 8, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 249.95, df = 8, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 250.47, df = 8, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 250.37, df = 8, p-value < 2.2e-16
# Print minimun prediction in each dataset
print(paste("1:",min(dfs_1[[1]]$fit)))
print(paste("2:",min(dfs_1[[2]]$fit)))
print(paste("3:",min(dfs_1[[3]]$fit)))
print(paste("4:",min(dfs_1[[4]]$fit)))
print(paste("5:",min(dfs_1[[5]]$fit)))
[1] "1: 3.04925831315137" [1] "2: 2.85396290567098" [1] "3: 3.82577662938494" [1] "4: 3.04276043266461" [1] "5: 3.83326986467252"
# Generate models for each imputed dataset
ra0 <- with(dfs, expr = lm(KALTMIETE_M2 ~ NEBENKOSTEN_M2 + NEBENKOSTEN_SQUARED + HEIZUNGSKOSTEN_M2 + ZIMMER +
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + Neubau + Zentralheizung
+ Gas + Fernwärme))
ra1 <- with(dfs, expr = lm(KALTMIETE_M2 ~ NEBENKOSTEN_M2 + NEBENKOSTEN_SQUARED + HEIZUNGSKOSTEN_M2
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + Neubau + Zentralheizung
+ Gas))
ra <- with(dfs, expr = lm(KALTMIETE_M2 ~ NEBENKOSTEN_M2 + NEBENKOSTEN_SQUARED
+ WOHNFLAECHE +WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + Neubau + Zentralheizung
+ Gas))
# Build a list of models
models <- list( pool(ra0),pool(ra1),pool(ra))
# Generates model summary table
modelsummary(models, fmt = "%.4f",estimate = "{estimate}{stars}", coef_rename = c("NEBENKOSTEN_M2" = "Utility Costs",
"NEBENKOSTEN_SQUARED"="Utility Costs Squared",
"HEIZUNGSKOSTEN_M2" = "Heating Costs",
"ZIMMER" = "Rooms Number","WOHNFLAECHE" = "Living Area",
"WOHNFLAECHE_SQUARED" = "Living Area Squared",
"WOHNFLAECHE_CUBIC" = "Cubic Living Area",
"Neubau" = "New Construction",
"Zentralheizung1" = "Central Heating",
"Gas1" = "Gas", "Fernwärme1"= "District Heating"),output="jupyter")
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| (Intercept) | 18.0497*** | 17.9116*** | 17.9893*** |
| (1.4580) | (1.3880) | (1.1918) | |
| Utility Costs | −1.1715* | −1.1510* | −1.1463* |
| (0.4534) | (0.4514) | (0.4619) | |
| Utility Costs Squared | 0.3584*** | 0.3557*** | 0.3559*** |
| (0.0685) | (0.0681) | (0.0673) | |
| Heating Costs | 0.0420 | 0.0498 | |
| (0.2622) | (0.2622) | ||
| Rooms Number | −0.0646 | ||
| (0.1798) | |||
| Living Area | −0.2042*** | −0.2081*** | −0.2085*** |
| (0.0381) | (0.0363) | (0.0355) | |
| Living Area Squared | 0.0024*** | 0.0024*** | 0.0024*** |
| (0.0004) | (0.0004) | (0.0004) | |
| Cubic Living Area | −0.0000*** | −0.0000*** | −0.0000*** |
| (0.0000) | (0.0000) | (0.0000) | |
| New Construction | 4.8587*** | 4.8369*** | 4.8347*** |
| (0.2689) | (0.2697) | (0.2723) | |
| Central Heating | −1.5017* | −1.4968* | −1.4933* |
| (0.4493) | (0.4506) | (0.4473) | |
| Gas | 0.7029* | 0.8204** | 0.8223*** |
| (0.3349) | (0.2313) | (0.2280) | |
| District Heating | −0.1639 | ||
| (0.3081) | |||
| Num.Obs. | 2950 | 2950 | 2950 |
| Num.Imp. | 5 | 5 | 5 |
| R2 | 0.295 | 0.295 | 0.295 |
| R2 Adj. | 0.293 | 0.293 | 0.293 |
| AIC | 17607.2 | 17604.1 | 17604.3 |
| BIC | 17685.1 | 17670.0 | 17664.2 |
# Print the p-value of the overall models
print(paste("Model 1: ",glance_custom.mipo(models[[1]])$p.value))
print(paste("Model 2: ",glance_custom.mipo(models[[2]])$p.value))
print(paste("Model 3: ",glance_custom.mipo(models[[3]])$p.value))
[1] "Model 1: 4.15942745579809e-212" [1] "Model 2: 4.00052600130179e-214" [1] "Model 3: 4.63010050005655e-215"
# Pooling of the selected model
poolm <- pool(ra)
# -------- Deriving the full variance covariance matrix ------------------
# -------(Taken from a Thread of https://stackoverflow.com)---------------
# m different imputations
m <- poolm$m
# Vw, the within imputation covariance matrix
vw <- Reduce("+", lapply(ra$analyses, vcov)) / (m)
# Vb, the between imputation covariance matrix
# mice only provides the diagonal elements
bdiag <- poolm$pooled$b
# Calculate the full Vb matrix by hand
# Qbar, pooled parameter estimates
qbar <- getqbar(poolm)
# Qhats, each imputations parameter estimates
qhats <- sapply(ra$analyses, coef)
vb <- (1 / (m-1)) * (qhats - qbar) %*% t(qhats - qbar)
vt <- vw + (1 + 1 / (m)) * vb # this is t as it used to be
# Checking against the diagonal of t that is still provided
all.equal(as.numeric(diag(vt)), poolm$pooled$t)
# Convert covariance matrix to correlation matrix
corr <- solve(diag(sqrt(diag(vt)))) %*% vt %*% solve(diag(sqrt(diag(vt))))
varcorr <- cov2cor(vt)
# Corelation matrix of betha
corr_2
| (Intercept) | NEBENKOSTEN_M2 | NEBENKOSTEN_SQUARED | WOHNFLAECHE | WOHNFLAECHE_SQUARED | WOHNFLAECHE_CUBIC | BAUJAHR | BAUJAHR_SQUARED | Zentralheizung1 | Gas1 | |
|---|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 1.00000000 | -0.54908070 | 0.53507033 | -0.83604381 | 0.768735331 | -0.710737552 | 0.16217464 | 0.085159263 | 0.0866590 | -0.025662787 |
| NEBENKOSTEN_M2 | -0.54908070 | 1.00000000 | -0.96781602 | 0.12574763 | -0.096174855 | 0.088883890 | -0.33365539 | -0.190617526 | -0.4081307 | -0.187689625 |
| NEBENKOSTEN_SQUARED | 0.53507033 | -0.96781602 | 1.00000000 | -0.12995706 | 0.100751509 | -0.096018623 | 0.26936047 | 0.164507065 | 0.3063020 | 0.167908072 |
| WOHNFLAECHE | -0.83604381 | 0.12574763 | -0.12995706 | 1.00000000 | -0.978940358 | 0.933850390 | -0.09023771 | -0.045955357 | -0.1269911 | -0.024752816 |
| WOHNFLAECHE_SQUARED | 0.76873533 | -0.09617486 | 0.10075151 | -0.97894036 | 1.000000000 | -0.984328676 | 0.04858789 | 0.008966544 | 0.1242373 | 0.007872795 |
| WOHNFLAECHE_CUBIC | -0.71073755 | 0.08888389 | -0.09601862 | 0.93385039 | -0.984328676 | 1.000000000 | -0.03057400 | -0.001389355 | -0.1135257 | -0.002507977 |
| BAUJAHR | 0.16217464 | -0.33365539 | 0.26936047 | -0.09023771 | 0.048587892 | -0.030574003 | 1.00000000 | 0.872085220 | 0.5342747 | 0.287987085 |
| BAUJAHR_SQUARED | 0.08515926 | -0.19061753 | 0.16450706 | -0.04595536 | 0.008966544 | -0.001389355 | 0.87208522 | 1.000000000 | 0.2954733 | 0.094674245 |
| Zentralheizung1 | 0.08665900 | -0.40813070 | 0.30630202 | -0.12699108 | 0.124237252 | -0.113525681 | 0.53427467 | 0.295473310 | 1.0000000 | 0.437672310 |
| Gas1 | -0.02566279 | -0.18768962 | 0.16790807 | -0.02475282 | 0.007872795 | -0.002507977 | 0.28798708 | 0.094674245 | 0.4376723 | 1.000000000 |
# Generate Residuals and Effect Plots
dfs_1<- lapply(1:5, function(i) complete(dfs, action = i))
for (i in 1:5){
model<- lm(data=dfs_1[[i]],KALTMIETE_M2 ~ NEBENKOSTEN_M2 + ZIMMER
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + Neubau + Zentralheizung
+ Gas + Fernwärme)
res <- resid(model)
dfs_1[[i]]$fit <- fitted(model)
#dfs_1[[i]]$res <- rstandard(model)
dfs_1[[i]]$Effect <- model$coefficients["(Intercept)"] + model$coefficients["WOHNFLAECHE"]*dfs_1[[i]]$WOHNFLAECHE + model$coefficients["WOHNFLAECHE_SQUARED"]* (dfs_1[[i]]$WOHNFLAECHE_SQUARED) + model$coefficients["WOHNFLAECHE_CUBIC"]*(dfs_1[[i]]$WOHNFLAECHE_CUBIC)
#produce residual vs. fitted plot
print(ggplot(data.frame(fitted(model),res),aes(fitted(model),res)) +
geom_point(alpha=0.6) + stat_smooth(method='loess')+ theme_bw()+
xlab('Predicted Values') + ylab('Residuals') +
theme(axis.title = element_text(face="bold")))
#plot effect of the living area
print(qplot(WOHNFLAECHE, Effect,data = dfs_1[[i]],
geom = c("point")) + theme_bw() +
ggtitle(substitute(paste(bold('Effect of the Living Area')))) + theme(plot.title = element_text(hjust = 0.5))
+ labs(x= expression(bold("Living Area in") ~bold(m^2)), y=substitute(paste(bold('Effect')))
))
print(bptest(model))
}
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 183.84, df = 9, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 157.99, df = 9, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 184.86, df = 9, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 175.26, df = 9, p-value < 2.2e-16
`geom_smooth()` using formula 'y ~ x'
studentized Breusch-Pagan test data: model BP = 185.98, df = 9, p-value < 2.2e-16
# Print minimun prediction in each dataset
print(paste("1:",min(dfs_1[[1]]$fit)))
print(paste("2:",min(dfs_1[[2]]$fit)))
print(paste("3:",min(dfs_1[[3]]$fit)))
print(paste("4:",min(dfs_1[[4]]$fit)))
print(paste("5:",min(dfs_1[[5]]$fit)))
[1] "1: 3.11682478490676" [1] "2: 2.55910213342987" [1] "3: 3.84904019682308" [1] "4: 1.73628279993135" [1] "5: 2.87823769049346"
# Generate Residuals and Effect Plots
dfs_1<- lapply(1:5, function(i) complete(dfs, action = i))
for (i in 1:5){
model1<- lm(data=dfs_1[[i]],KALTMIETE_M2 ~ NEBENKOSTEN_M2
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + BAUJAHR + BAUJAHR_SQUARED + Zentralheizung
+ Gas)
model2<- lm(data=dfs_1[[i]],KALTMIETE_M2 ~ NEBENKOSTEN_M2 + ZIMMER
+ WOHNFLAECHE + WOHNFLAECHE_SQUARED + WOHNFLAECHE_CUBIC + Neubau + Zentralheizung
+ Gas + Fernwärme)
dfs_1[[i]]$Effect1 <- model1$coefficients["(Intercept)"] + model1$coefficients["BAUJAHR"]*dfs_1[[i]]$BAUJAHR + model1$coefficients["BAUJAHR_SQUARED"]*(dfs_1[[i]]$BAUJAHR_SQUARED)
dfs_1[[i]]$Effect2 <- ifelse(dfs_1[[i]]$Neubau==1,model2$coefficients["(Intercept)"] + model2$coefficients["Neubau"]*dfs_1[[i]]$Neubau,NA)
dfs_1[[i]]$Effect3 <- ifelse(dfs_1[[i]]$Neubau==0 & dfs_1[[i]]$BAUJAHR>=(-100),model2$coefficients["(Intercept)"] + model2$coefficients["Neubau"]*dfs_1[[i]]$Neubau,NA)
dfs_1[[i]]$Effect4 <- model1$coefficients["(Intercept)"] + model1$coefficients["WOHNFLAECHE"]*dfs_1[[i]]$WOHNFLAECHE + model1$coefficients["WOHNFLAECHE_SQUARED"]* (dfs_1[[i]]$WOHNFLAECHE_SQUARED) + model1$coefficients["WOHNFLAECHE_CUBIC"]*(dfs_1[[i]]$WOHNFLAECHE_CUBIC)
dfs_1[[i]]$Effect5 <- model2$coefficients["(Intercept)"] + model2$coefficients["WOHNFLAECHE"]*dfs_1[[i]]$WOHNFLAECHE + model2$coefficients["WOHNFLAECHE_SQUARED"]* (dfs_1[[i]]$WOHNFLAECHE_SQUARED) + model2$coefficients["WOHNFLAECHE_CUBIC"]*(dfs_1[[i]]$WOHNFLAECHE_CUBIC)
#plot effect of the year of construction
print(ggplot(data = dfs_1[[i]],aes(x=BAUJAHR+2000)) + theme_bw()
+ geom_line(aes(y = Effect2), color = "darkred")
+ geom_line(aes(y = Effect3), color = "darkred")
+ geom_line(aes(y = Effect1), color="steelblue", linetype="twodash")
+ geom_vline(aes(xintercept=2000), linetype=2, color="grey")
+ ggtitle(substitute(paste(bold('Effect of the Year of Construction')))) + theme(plot.title = element_text(hjust = 0.5))
+ labs(x= substitute(bold(paste('Year of Construction'))), y=substitute(bold(paste('Effect')))))
#plot effect of the living area
print(ggplot(data = dfs_1[[i]],aes(x=WOHNFLAECHE)) + theme_bw()
+ geom_line(aes(y = Effect4), color = "darkred")
+ geom_line(aes(y = Effect5), color="steelblue", linetype="twodash")
+ ggtitle(substitute(paste(bold('Effect of the Living Area')))) + theme(plot.title = element_text(hjust = 0.5))
+ labs(x= expression(bold("Living Area in") ~bold(m^2)), y=substitute(bold(paste('Effect')))))
}
Warning message: “Removed 1866 row(s) containing missing values (geom_path).” Warning message: “Removed 1159 row(s) containing missing values (geom_path).”
Warning message: “Removed 1868 row(s) containing missing values (geom_path).” Warning message: “Removed 1152 row(s) containing missing values (geom_path).”
Warning message: “Removed 1859 row(s) containing missing values (geom_path).” Warning message: “Removed 1162 row(s) containing missing values (geom_path).”
Warning message: “Removed 1885 row(s) containing missing values (geom_path).” Warning message: “Removed 1145 row(s) containing missing values (geom_path).”
Warning message: “Removed 1883 row(s) containing missing values (geom_path).” Warning message: “Removed 1142 row(s) containing missing values (geom_path).”